In [65]:
import os
from collections import defaultdict
import torch
import numpy as np
import scipy.stats
from torch.distributions import constraints
from matplotlib import pyplot
import random

import pyro
import pyro.distributions as dist
from pyro import poutine
from pyro.infer.autoguide import AutoDelta
from pyro.optim import Adam
from pyro.infer import SVI, TraceEnum_ELBO, config_enumerate, infer_discrete
from pyro.ops.indexing import Vindex
from pyro.infer import MCMC, NUTS

## Data preparation and cleaning
# importing required packages
import pyreadr
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
%autosave 30

import umap
import plotly
import plotly.graph_objs as go

import pickle

pyro.enable_validation(True)
Autosaving every 30 seconds

Data preparation and visualization

In [2]:
# import data and convert to python objects
# load Rdata object
rdata = pyreadr.read_r('tomtom_data/det_dfs.Rdata')
# pull out separate dataframes, one with choice text, the other with numeric responses
df = rdata['df']
dfn = rdata['dfn']
df.head()


# read in state labels
states = pd.read_csv('tomtom_data/states.csv')
states['statepair'] = states['state1'] + '_' + states['state2']# construct state pair strings
states.head()

# extract all the self ratings
ind_selfq1 = df.columns.get_loc('X1_Q12_1') # location of the first question
trans_self = df.iloc[:,list(range(ind_selfq1,(ind_selfq1+75)))]
print('shape of self-rating dataframe: ',trans_self.shape)
trans_self.columns = states['statepair'].tolist() # renaming transitioin columns with the corresponding transition pairs
#print(trans_self.columns[1:5])
#trans_self.head()
# extract all the specific target ratings
ind_targq1 = df.columns.get_loc('X1_Q9_1') # location of the first question
trans_targ = df.iloc[:,list(range(ind_targq1,(ind_targq1+75)))]
print('shape of target-rating dataframe: ', trans_targ.shape)
trans_targ.columns = states['statepair'].tolist() # renaming transitioin columns with the corresponding transition pairs
#print(trans_targ.columns[1:5])
#trans_targ.head()
# extract all the group level ratings
ind_avgq1 = df.columns.get_loc('X1_Q11_1') # location of the first question
trans_avg = df.iloc[:,list(range(ind_avgq1,(ind_avgq1+75)))]
print('shape of group-rating dataframe: ', trans_avg.shape)
trans_avg.columns = states['statepair'].tolist() # renaming transitioin columns with the corresponding transition pairs
#print(trans_avg.columns[1:5])
#trans_targ.head()
shape of self-rating dataframe:  (94, 75)
shape of target-rating dataframe:  (94, 75)
shape of group-rating dataframe:  (94, 75)
In [3]:
# inspect the "spread" of target vs group ratings using pairwise correlation between subjects
# getting pairwise correlations
trans_self_corbtsub = trans_self.transpose().corr()
trans_targ_corbtsub = trans_targ.transpose().corr()
trans_avg_corbtsub = trans_avg.transpose().corr()

# get the mean pairwise correlation for each of the three sets of correlations
def mean_pairwise_corr(corbtsub):
    corbtsub = np.array(corbtsub)
    corbtsub_flat = corbtsub[np.tril_indices(corbtsub.shape[0],-1)] # gett lower triangle and flatten
#     print(corbtsub_flat.shape)
    return corbtsub_flat, np.nanmean(corbtsub_flat)

trans_self_pairwiseflat, trans_self_meanpairwise = mean_pairwise_corr(trans_self_corbtsub)
trans_targ_pairwiseflat, trans_targ_meanpairwise = mean_pairwise_corr(trans_targ_corbtsub)
trans_avg_pairwiseflat, trans_avg_meanpairwise = mean_pairwise_corr(trans_avg_corbtsub)

print('The spread of self ratings: ', trans_self_meanpairwise)
print('The spread of target ratings: ', trans_targ_meanpairwise)
print('The spread of group ratings: ', trans_avg_meanpairwise)

scipy.stats.ttest_rel(trans_targ_pairwiseflat,trans_avg_pairwiseflat, nan_policy='omit')
The spread of self ratings:  0.5710433236550672
The spread of target ratings:  0.5042138201358816
The spread of group ratings:  0.6135978109836753
Out[3]:
Ttest_relResult(statistic=-44.555126397363594, pvalue=0.0)
In [4]:
# Additional way to look at the spread of specific vs group: looking at the variance for each of the items
# the graph shows less spread at the item level for group ratings than for self or target specific ratings.
itemvar_trans_self = trans_self.var(axis = 0)
itemvar_trans_targ = trans_targ.var(axis = 0)
itemvar_trans_avg = trans_avg.var(axis = 0)

sns.distplot(itemvar_trans_self, label='Self')
sns.distplot(itemvar_trans_targ, label='Target')
sns.distplot(itemvar_trans_avg, label='Group')
plt.legend()
plt.show()
In [5]:
# reusable code for pre processing each of the three sets of ratings into pyro compatible format
def data_transform(trans):
    # set the constant for converting probability to frequency
    freq_constant = 10000
    # indexing autotransition columns
    colnames = trans.columns.tolist()
    cnsplit = [p.split('_') for p in colnames]
    idx_autotransition = [p[0] == p[1] for p in cnsplit] # list of boolean, True = is autotransition
    
    # 1. normalizing with autotransitions included, one df for probability, one converted to frequency
    # initialize 2 dataframes
    t_norm_all = pd.DataFrame(columns=trans.columns, index = trans.index)
    t_norm_all_f =  pd.DataFrame(columns=trans.columns, index = trans.index)
    
    # normalize by row-sum every five columns, since the columns are already arranged by from-state in 5
    for i in range(0,trans.shape[1],5):
        dftemp = trans.iloc[:,i:(i+5)]
        dftemp_rowsum = dftemp.sum(axis = 1)
        normed_cols = dftemp/dftemp_rowsum[:,np.newaxis]
        t_norm_all.iloc[:,i:(i+5)] = normed_cols
        t_norm_all_f.iloc[:,i:(i+5)] = (normed_cols * freq_constant).round()
        
    # 2. two additional dataframes: normed with auto transition but don't contain them
    t_norm_all_noauto = t_norm_all.loc[:,[not t for t in idx_autotransition]]
    t_norm_all_noauto_f = t_norm_all_f.loc[:,[not t for t in idx_autotransition]]

    # 3. finally, normalizing without autotransitions, and also convert to frequency
    trans_noauto = trans.loc[:,[not t for t in idx_autotransition]]
    t_norm_noauto = pd.DataFrame(columns=trans_noauto.columns, index = trans_noauto.index)
    t_norm_noauto_f = pd.DataFrame(columns=trans_noauto.columns, index = trans_noauto.index)

    # normalize by row-sum every FOUR columns, grouped by from-state in 4 without autotransition
    for i in range(0,trans_noauto.shape[1],4):
        dftemp = trans_noauto.iloc[:,i:(i+4)]
        dftemp_rowsum = dftemp.sum(axis = 1)
        normed_cols = dftemp/dftemp_rowsum[:,np.newaxis]
        t_norm_noauto.iloc[:,i:(i+5)] = normed_cols
        t_norm_noauto_f.iloc[:,i:(i+5)] = (normed_cols * freq_constant).round()
        
    t_norm_all_3d = torch.tensor(np.stack([np.array(t_norm_all.iloc[0]).reshape(15,5) 
                                        for i in np.arange(t_norm_all.shape[0])]).astype('float32'))
    t_norm_noauto_3d = torch.tensor(np.stack([np.array(t_norm_noauto.iloc[0]).reshape(15,4) 
                                            for i in np.arange(t_norm_noauto.shape[0])]).astype('float32'))
    t_raw_all_3d = torch.tensor(np.stack([np.array(trans.iloc[0]).reshape(15,5) 
                                            for i in np.arange(trans.shape[0])]).astype('float32'))
    t_raw_noauto_3d = torch.tensor(np.stack([np.array(trans_noauto.iloc[0]).reshape(15,4) 
                                            for i in np.arange(trans_noauto.shape[0])]).astype('float32'))
    
    return t_norm_all_3d, t_norm_noauto_3d, t_raw_all_3d/100, t_raw_noauto_3d/100
In [21]:
global tself_norm_all_3d, tself_norm_noauto_3d, tself_raw_all_3d, tself_raw_noauto_3d
global ttarg_norm_all_3d, ttarg_norm_noauto_3d, ttarg_raw_all_3d, ttarg_raw_noauto_3d
global tavg_norm_all_3d, tavg_norm_noauto_3d, tavg_raw_all_3d, tavg_raw_noauto_3d
tself_norm_all_3d, tself_norm_noauto_3d, tself_raw_all_3d, tself_raw_noauto_3d = data_transform(trans_self)
ttarg_norm_all_3d, ttarg_norm_noauto_3d, ttarg_raw_all_3d, ttarg_raw_noauto_3d = data_transform(trans_targ)
tavg_norm_all_3d, tavg_norm_noauto_3d, tavg_raw_all_3d, tavg_raw_noauto_3d = data_transform(trans_avg)

Function definition

In [75]:
# define a model function that's dynamically declared
@config_enumerate()
def model(data):
    # Background probability of different groups (assume equally likely)
    weights = pyro.sample('weights', dist.Dirichlet(0.5 * torch.ones(K)))
    # declare number of columns based on whether auto-transitions are included
    if auto == 'all':
        ncol = 5
    elif auto == 'noauto':
        ncol = 4
            
    # declare model parameters based on whether the data are row-normalized
    if norm == 'norm':
        with pyro.plate('components', K):
            # concentration parameters
            concentration = pyro.sample('concentration', 
                                        dist.Gamma(2 * torch.ones(15,ncol), 1/3 * torch.ones(15,ncol)).to_event(2))
        
        with pyro.plate('data', data.shape[0]):
            assignment = pyro.sample('assignment', dist.Categorical(weights))
            #d = dist.Dirichlet(concentration[assignment,:,:].clone().detach()) # .detach() might interfere with backprop
            d = dist.Dirichlet(concentration[assignment,:,:])
            pyro.sample('obs', d.to_event(1), obs=data)
            
    elif norm == 'raw':
        with pyro.plate('components', K):
            alphas = pyro.sample('alpha', dist.Gamma(2 * torch.ones(15,ncol), 1/3 * torch.ones(15,ncol)).to_event(2))
            betas = pyro.sample('beta', dist.Gamma(2 * torch.ones(15,ncol), 1/3 * torch.ones(15,ncol)).to_event(2))
            
        with pyro.plate('data', data.shape[0]):
            assignment = pyro.sample('assignment', dist.Categorical(weights))
            d = dist.Beta(alphas[assignment,:,:], betas[assignment,:,:])
            pyro.sample('obs', d.to_event(2), obs=data)

# function used to initialize model
def initialize(seed,model,data):    
    global global_guide, svi
    pyro.set_rng_seed(seed)
    pyro.clear_param_store()
    if norm == 'norm':
        global_guide = AutoDelta(poutine.block(model, expose=['weights', 'concentration']))
    elif norm == 'raw':
        global_guide = AutoDelta(poutine.block(model, expose=['weights', 'alpha', 'beta']))
    svi = SVI(model, global_guide, optim, loss=elbo)
    return svi.loss(model, global_guide, data)
            
# define a code chunk that does the SVI step for singel variation
def tomtom_svi():
    pyro.clear_param_store()
    
    #declare dataset to be modeled
    dtname = 't{}_{}_{}_3d'.format(target, norm, auto)
    print("running SVI with: {}".format(dtname))
    data = globals()[dtname]
    
    loss, seed = min((initialize(seed,model,data), seed) for seed in range(100))
    initialize(seed,model,data)
    print('seed = {}, initial_loss = {}'.format(seed, loss))
    
    gradient_norms = defaultdict(list)
    for name, value in pyro.get_param_store().named_parameters():
        value.register_hook(lambda g, name=name: gradient_norms[name].append(g.norm().item()))

    losses = []
    for i in range(3000):
        loss = svi.step(data)
        #print(loss)
        losses.append(loss)
        if i % 100 == 0:
            print('.',end = '')
    print('\n final loss: {}\n'.format(losses[-1]))

    map_estimates = global_guide(data)
    weights = map_estimates['weights']
    print('weights = {}'.format(weights.data.numpy()))
    if norm == 'norm':
        concentration = map_estimates['concentration']
        print('concentration = {}'.format(concentration.data.numpy()))
    elif norm == 'raw':
        alpha = map_estimates['alpha']
        print('alphas = {}'.format(alpha.data.numpy()))
        beta = map_estimates['beta']
        print('beta = {}'.format(beta.data.numpy()))
    return seed, map_estimates

def tomtom_mcmc(seed,nsample = 5000, burnin = 1000):
    pyro.clear_param_store()
    pyro.set_rng_seed(seed)

    #declare dataset to be modeled
    dtname = 't{}_{}_{}_3d'.format(target, norm, auto)
    print("running MCMC with: {}".format(dtname))
    data = globals()[dtname]

    nuts_kernel = NUTS(model)

    mcmc = MCMC(nuts_kernel, num_samples=nsample, warmup_steps=burnin)
    mcmc.run(data)
    
    posterior_samples = mcmc.get_samples()
    return posterior_samples
                                     
                                    
In [79]:
with open('tomtom_mcmc.pkl','rb') as input:
    [posterior_samples_self_norm_all,
    posterior_samples_self_norm_noauto,
    posterior_samples_self_raw_all,
    posterior_samples_self_raw_noauto] = pickle.load(input)

Self ratings

Visual Inspections

In [18]:
random.seed(200515)
# umap visualization
# rudimentary test run of umap on self transition data
fit_trans_self_default = umap.UMAP()
u_trans_self_default = fit_trans_self_default.fit_transform(trans_self)
In [19]:
plt.scatter(u_trans_self_default[:,0],u_trans_self_default[:,1])
Out[19]:
<matplotlib.collections.PathCollection at 0x1b65584e400>
In [20]:
fit_trans_self_neighbor30 = umap.UMAP(n_neighbors=15,min_dist=0.05)
u_trans_self_neighbor30 = fit_trans_self_neighbor30.fit_transform(trans_self)
plt.scatter(u_trans_self_neighbor30[:,0],u_trans_self_neighbor30[:,1])
Out[20]:
<matplotlib.collections.PathCollection at 0x1b654262fd0>
In [21]:
# pca to see how many dimensions best capture
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
scaler = StandardScaler()
trans_self_scaled = scaler.fit_transform(trans_self)
decomp = PCA()
trans_self_pca = decomp.fit(trans_self_scaled)
plt.scatter(list(range(np.shape(trans_self_pca.explained_variance_ratio_)[0])),np.array(trans_self_pca.explained_variance_ratio_))
Out[21]:
<matplotlib.collections.PathCollection at 0x1b6529c0b00>
In [22]:
# above pca result suggests 4-6 factors
# here we construct a new umap object with 3 dimensions and visulize it to see if there are obv clusters
fit_trans_self_3d = umap.UMAP(n_neighbors=47,min_dist=0,n_components=3)
u_trans_self_3d = fit_trans_self_3d.fit_transform(trans_self)

# Configure Plotly to be rendered inline in the notebook.
plotly.offline.init_notebook_mode()

trace = go.Scatter3d(
    x=u_trans_self_3d[:,0],  # <-- Put your data instead
    y=u_trans_self_3d[:,1],  # <-- Put your data instead
    z=u_trans_self_3d[:,2],  # <-- Put your data instead
    mode='markers',
    marker={
        'size': 10,
        'opacity': 0.8,
        #'color': ,
    }
)
# Configure the layout.
layout = go.Layout(
    margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)
data = [trace]

plot_figure = go.Figure(data=data, layout=layout)
# Render the plot.
plotly.offline.iplot(plot_figure)

Normalized, with Autotransitions

SVI

In [71]:
K = 2
target = 'self' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [72]:
seed_self_norm_all, map_self_norm_all = tomtom_svi()
running SVI with: tself_norm_all_3d
seed = 44, initial_loss = -4241.4052734375
..............................
 final loss: -12607.4443359375

weights = [0.9429489  0.05705118]
concentration = [[[18.360336   9.887376   9.427387  20.314156  18.688635 ]
  [18.6975     8.14305   12.0124235 18.335918  16.054907 ]
  [ 3.278261  15.794228  16.299017  10.724151  14.627304 ]
  [ 5.4933143 23.7035    26.655422   5.201116   3.42356  ]
  [19.910328  13.137211   6.237562  16.123363  22.065937 ]
  [ 8.742342  17.87768   26.29625   25.455116  21.003141 ]
  [21.799572  32.56245    4.751368  20.32355    5.8237166]
  [ 7.259423   8.033038  17.330927  16.006342  18.470438 ]
  [20.772537  18.637081   4.284183   6.471355   4.7371564]
  [ 9.912656  12.299383  25.528433  23.28356   25.246    ]
  [ 3.7038891  4.1638527 19.039509  19.754557  12.822621 ]
  [ 4.404784   2.0620575 20.900562  20.56929    4.622026 ]
  [ 5.768946  12.711373  20.297647  25.578012  30.16177  ]
  [22.2137    22.917488   6.140551   3.6454303 11.288901 ]
  [17.086843  16.147865   5.26022    3.3116062  8.441843 ]]

 [[ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.0000005  3.0000007  3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]]]

MCMC

In [ ]:
posterior_samples_self_norm_all = tomtom_mcmc(seed_self_norm_all)
posterior_samples_self_norm_all['weights'].shape
In [81]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_self_norm_all['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()

Normalized, no Autotransitions

SVI

In [87]:
K = 2
target = 'self' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [88]:
seed_self_norm_noauto, map_self_norm_noauto = tomtom_svi()
running SVI with: tself_norm_noauto_3d
seed = 81, initial_loss = -2186.4140625
..............................
 final loss: -8585.7421875

weights = [0.05705118 0.9429489 ]
concentration = [[[ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.0000007  3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.000002 ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]]

 [[18.268353   9.589527   9.155447  18.036222 ]
  [11.82444   16.605541  27.000458  23.459406 ]
  [ 5.358502  27.854244  18.34807   24.189775 ]
  [ 5.728013  25.321272   5.397191   3.5620189]
  [30.355797  19.01255    8.918908  23.411695 ]
  [ 8.088181  16.45165   24.274176  18.954166 ]
  [16.22755    3.8332026 15.705589   4.633519 ]
  [ 8.415727   9.383972  20.561676  20.55328  ]
  [24.93583    5.6238585  8.685258   6.258177 ]
  [ 7.4622035  9.248475  19.195898  16.069023 ]
  [ 3.8170018  4.2232094 19.12619   13.136491 ]
  [ 6.380614   2.8913484 31.364788   6.7286215]
  [ 5.73599   12.69877   20.16954   24.395779 ]
  [24.303288   6.7729692  4.041931  12.523495 ]
  [24.16663    7.319207   4.5267835 11.867839 ]]]

MCMC

In [ ]:
posterior_samples_self_norm_noauto = tomtom_mcmc(seed_self_norm_noauto)
posterior_samples_self_norm_noauto['weights'].shape
In [89]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_self_norm_noauto['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()

Raw, with autotransitions

SVI

In [85]:
K = 2
target = 'self' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [86]:
seed_self_raw_all, map_self_raw_all = tomtom_svi()
running SVI with: tself_raw_all_3d
seed = 46, initial_loss = 12693.0654296875
..............................
 final loss: -11564.2880859375

weights = [0.05705118 0.9429489 ]
alphas = [[[ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.0000005  3.         3.       ]
  [ 3.0000007  3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.0000007  3.         3.         3.         3.       ]]

 [[28.388739  19.441656  19.43362   23.270267  25.737667 ]
  [15.7004595 12.673215  21.097595  18.3136    20.419235 ]
  [ 4.6243854 25.423384  35.38552   12.267473  26.465506 ]
  [ 3.5639603 14.467362  16.699032   3.870631   4.315134 ]
  [19.54422   16.33251    6.780778  23.993502  16.778297 ]
  [11.301789  29.455523  25.308706  16.328503  31.620314 ]
  [30.276737  18.943432   2.671945  23.510315   5.1809287]
  [10.465314  10.586651  22.992296  21.412735  22.811293 ]
  [27.954317  24.547686   4.9703193  6.1243114  5.9292326]
  [12.645939  16.452532  15.467956  22.850004  26.372906 ]
  [ 3.2305176  5.096125  19.02637   18.416826  13.30365  ]
  [ 5.260877   1.9769851 30.257057  16.379436   4.623541 ]
  [ 4.8120284 15.920653  10.320574  11.561948  16.135416 ]
  [12.849543  21.635172   6.0533905  3.1281555 14.399462 ]
  [21.6389    30.19907    5.487404   4.5420566 21.488794 ]]]
beta = [[[ 3.         3.         3.         3.         3.       ]
  [ 3.0000005  3.         3.         3.         3.       ]
  [ 3.0000007  3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.000002   3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.0000007]
  [ 3.         3.         3.         3.         3.       ]]

 [[ 2.9350183 24.040386  25.272678   1.6667666  3.6427011]
  [ 0.9154133 20.100483  16.244259   2.49104    6.54481  ]
  [23.624918   2.6426167  6.699677  10.41967    8.778327 ]
  [15.223972   3.0052686  2.477967  17.93481   34.70529  ]
  [ 2.1437469 13.102677  19.02132   11.720478   1.5258031]
  [26.601255  19.159933   2.0601273  2.2768602 12.678494 ]
  [20.570545   1.4264281 16.143707  18.829248  26.737032 ]
  [19.53446   16.640574   4.5195713  3.9449694  2.723329 ]
  [ 1.9231623  4.789922  21.967133  15.335095  23.282043 ]
  [22.777647  20.606384   1.4196259  4.486048   1.2470531]
  [15.661574  22.650778   1.929366   2.5087497 10.417388 ]
  [23.4261    20.558289   2.736535   2.2758598 19.034468 ]
  [22.91793   26.448374   7.31034    4.0892463  1.2782795]
  [ 1.5530981  1.70976   18.821012  17.905693  17.867369 ]
  [ 1.843502   1.0032741 13.599766  21.447954  25.842499 ]]]

MCMC

In [ ]:
posterior_samples_self_raw_all = tomtom_mcmc(seed_self_raw_all)
posterior_samples_self_raw_all['weights'].shape
In [90]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_self_raw_all['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()

Raw, without autotransitions

SVI

In [91]:
K = 2
target = 'self' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [93]:
seed_self_raw_noauto, map_self_raw_noauto = tomtom_svi()
running SVI with: tself_raw_noauto_3d
seed = 6, initial_loss = 6705.6806640625
..............................
 final loss: -8877.1318359375

weights = [0.05705118 0.9429489 ]
alphas = [[[ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.000002   3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]]

 [[12.582976  23.043362  17.695677  16.754545 ]
  [15.574501  19.232304  18.36303   27.706814 ]
  [ 5.489986  14.676224  21.670364  20.619196 ]
  [ 5.015178  19.370768   4.3699636  3.2517579]
  [19.174606  19.988241   6.720402  31.505795 ]
  [10.613559  21.482677  19.865273  21.034477 ]
  [16.833279   2.3538253 17.500385   4.7816324]
  [12.310332  14.85932   23.762896  25.401487 ]
  [29.69002    4.3696404  7.9889183  5.737582 ]
  [ 9.999598  17.183655  12.4985075 25.256096 ]
  [ 3.434004   5.2959485 15.849324  23.70921  ]
  [ 5.0767136  1.937626  24.106276   4.2409887]
  [ 4.0381193 15.214948  17.935148  22.07953  ]
  [18.354546   5.962914   3.6570008 17.974566 ]
  [22.732275  10.007886   5.502976  20.362617 ]]]
beta = [[[ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.0000005   3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]
  [ 3.          3.          3.          3.        ]]

 [[ 1.6649867  28.04411    22.927326    2.501706  ]
  [24.72088    14.903869    2.5155308   8.738113  ]
  [28.493546    3.0571253  17.501137    6.9576902 ]
  [22.258354    3.8888812  20.453674   25.155466  ]
  [ 2.1019804  16.703169   18.878029   15.206629  ]
  [24.941252   14.10101     1.7129086   8.606221  ]
  [11.646988   14.067041   13.475561   24.483467  ]
  [23.09177    23.609653    4.319242    2.9775121 ]
  [ 5.7085624  19.070274   20.464014   22.53802   ]
  [17.79328    22.089436    1.2392372   4.9019012 ]
  [17.04776    23.85589     2.2096286  17.695465  ]
  [22.608095   19.65619     2.2544584  17.246     ]
  [18.743168   25.221796   12.291551    7.417479  ]
  [ 2.0441403  18.558666   21.415394   22.430061  ]
  [ 0.96355635 25.925718   26.4436     24.764122  ]]]

MCMC

In [ ]:
posterior_samples_self_raw_noauto = tomtom_mcmc(seed_self_raw_noauto)
posterior_samples_self_raw_noauto['weights'].shape
In [92]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_self_raw_noauto['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()

Target-specific ratings

Visual Inspections

In [19]:
random.seed(200506)
# umap visualization
# rudimentary test run of umap on self transition data
fit_trans_targ_default = umap.UMAP()
u_trans_targ_default = fit_trans_targ_default.fit_transform(trans_targ)
C:\Users\zhaoz\AppData\Local\Continuum\anaconda3\lib\site-packages\numba\np\ufunc\parallel.py:355: NumbaWarning:

The TBB threading layer requires TBB version 2019.5 or later i.e., TBB_INTERFACE_VERSION >= 11005. Found TBB_INTERFACE_VERSION = 11000. The TBB threading layer is disabled.

In [39]:
plt.scatter(u_trans_targ_default[:,0],u_trans_targ_default[:,1])
Out[39]:
<matplotlib.collections.PathCollection at 0x1b658923940>
In [40]:
fit_trans_targ_neighbor30 = umap.UMAP(n_neighbors=30,min_dist=0.05)
u_trans_targ_neighbor30 = fit_trans_targ_neighbor30.fit_transform(trans_targ)
plt.scatter(u_trans_targ_neighbor30[:,0],u_trans_targ_neighbor30[:,1])
Out[40]:
<matplotlib.collections.PathCollection at 0x1b658dd5240>
In [41]:
# pca to see how many dimensions best capture
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
scaler = StandardScaler()
trans_targ_scaled = scaler.fit_transform(trans_targ)
decomp = PCA()
trans_targ_pca = decomp.fit(trans_targ_scaled)
plt.scatter(list(range(np.shape(trans_targ_pca.explained_variance_ratio_)[0])),np.array(trans_targ_pca.explained_variance_ratio_))
Out[41]:
<matplotlib.collections.PathCollection at 0x1b652d00908>
In [42]:
# above pca result suggests 4-6 factors
# here we construct a new umap object with 3 dimensions and visulize it to see if there are obv clusters
fit_trans_targ_3d = umap.UMAP(n_neighbors=47,min_dist=0,n_components=3)
u_trans_targ_3d = fit_trans_targ_3d.fit_transform(trans_targ)

# Configure Plotly to be rendered inline in the notebook.
plotly.offline.init_notebook_mode()

trace = go.Scatter3d(
    x=u_trans_targ_3d[:,0],  # <-- Put your data instead
    y=u_trans_targ_3d[:,1],  # <-- Put your data instead
    z=u_trans_targ_3d[:,2],  # <-- Put your data instead
    mode='markers',
    marker={
        'size': 10,
        'opacity': 0.8,
        #'color': ,
    }
)
# Configure the layout.
layout = go.Layout(
    margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)
data = [trace]

plot_figure = go.Figure(data=data, layout=layout)
# Render the plot.
plotly.offline.iplot(plot_figure)

Target specific ratings, normalized, with autotransitions

SVI

In [94]:
K = 2
target = 'targ' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [95]:
seed_targ_norm_all, map_targ_norm_all = tomtom_svi()
running SVI with: ttarg_norm_all_3d
seed = 60, initial_loss = -5360.37890625
..............................
 final loss: -12616.2744140625

weights = [0.05705118 0.9429489 ]
concentration = [[[ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.0000007  3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]]

 [[30.29195   11.598536   7.432845  25.695171  27.75862  ]
  [16.117245   4.988022   5.370252  17.048687  16.308735 ]
  [14.748011  21.881487  19.190256   8.427593   9.841498 ]
  [ 7.4333415 17.835655  19.426287  13.7225275 11.529222 ]
  [17.86233   12.240283   7.6770177 22.393492  23.938122 ]
  [ 7.38322    6.07665   19.426403  15.021889  19.689104 ]
  [25.06802   28.497396   3.1489744 15.155916   4.4829903]
  [ 8.137646   5.722582  19.043705  18.610151  21.21456  ]
  [19.763323  19.987282   8.167741   4.3431945 11.366734 ]
  [18.51022    9.899772  26.834742  23.756042  28.417234 ]
  [ 5.61079    7.5298643 23.334547  20.566029  19.697723 ]
  [ 3.9232428  8.525652  20.674866  18.67611   15.397802 ]
  [ 5.242027  13.711009  18.556423  19.302574  25.731586 ]
  [25.10388   26.026548   4.462358   8.475573  13.323829 ]
  [17.358303  21.48653    6.096048   7.963897   6.788324 ]]]

MCMC

In [ ]:
posterior_samples_targ_norm_all = tomtom_mcmc(seed_targ_norm_all)
posterior_samples_targ_norm_all['weights'].shape
In [ ]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_targ_norm_all['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()

Target specific ratings, normalized, No autotransitions

SVI

In [96]:
K = 2
target = 'targ' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [97]:
seed_targ_norm_noauto, map_targ_norm_noauto = tomtom_svi()
running SVI with: ttarg_norm_noauto_3d
seed = 48, initial_loss = -2910.710205078125
..............................
 final loss: -8417.859375

weights = [0.9429489  0.05705118]
concentration = [[[16.235493   6.363547   4.1486125 14.414887 ]
  [ 7.2670507  7.8562646 27.854916  23.431953 ]
  [19.735165  22.738064  10.374075  12.132537 ]
  [ 7.365294  17.781427  12.189513  11.396572 ]
  [11.482184   8.852459   5.620996  16.445728 ]
  [ 8.721779   7.183262  23.109697  22.332348 ]
  [24.695969   3.234765  15.671356   4.626318 ]
  [ 8.696376   6.110342  18.652014  22.732454 ]
  [21.80515    9.349282   4.9076786 12.993173 ]
  [16.159554   8.963062  24.3123    21.742153 ]
  [ 4.203985   5.611669  15.361769  13.981101 ]
  [ 4.1268992  8.994605  21.669893  15.125506 ]
  [ 5.7384324 15.012953  19.293924  21.16896  ]
  [18.08796    3.4870691  6.526205  10.18891  ]
  [37.5939    10.705686  14.1278    12.003898 ]]

 [[ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.0000005  3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]]]

MCMC

In [ ]:
posterior_samples_targ_norm_noauto = tomtom_mcmc(seed_targ_norm_noauto)
posterior_samples_targ_norm_noauto['weights'].shape
In [ ]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_targ_norm_noauto['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()

Target specific ratings, Raw, with autotransitions

SVI

In [98]:
K = 2
target = 'targ' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [99]:
seed_targ_raw_all, map_targ_raw_all = tomtom_svi()
running SVI with: ttarg_raw_all_3d
seed = 46, initial_loss = 7900.109375
..............................
 final loss: -10905.70703125

weights = [0.05705118 0.9429489 ]
alphas = [[[ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.0000005  3.         3.       ]
  [ 3.0000007  3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.0000007  3.         3.         3.         3.       ]]

 [[28.659534  13.716637   6.274544  24.104753  25.948214 ]
  [15.987634   6.322693  10.056811  18.229282  20.317352 ]
  [19.476187  25.412888  35.882076  13.101606  16.13107  ]
  [ 6.7152996 14.622859  16.611914  21.463326  19.432726 ]
  [20.287834  10.443322   7.4895844 22.9724    16.639692 ]
  [13.18772   10.0210705 25.50172   16.628414  30.925219 ]
  [28.247868  19.04197    1.8584521 18.790981   4.0950317]
  [10.935465   5.6059656 22.759863  21.486069  22.666828 ]
  [28.092976  24.486643  12.465151   3.5690634 13.006497 ]
  [25.723557   9.671463  15.531735  22.92807   26.308937 ]
  [ 4.1553917  8.511912  18.761866  18.634523  13.119044 ]
  [ 4.586718  11.8435135 30.47324   16.43024   23.077663 ]
  [ 5.144247  21.616486  10.267169  11.640184  16.135416 ]
  [12.898574  21.372055   3.4803252  7.509213  15.017989 ]
  [21.576643  29.658268   4.741111  10.790299   9.284555 ]]]
beta = [[[ 3.         3.         3.         3.         3.       ]
  [ 3.0000005  3.         3.         3.         3.       ]
  [ 3.0000007  3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.000002   3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.0000007]
  [ 3.         3.         3.         3.         3.       ]]

 [[ 5.129576  28.278973  23.303152   9.350121   5.7699475]
  [ 4.467884  19.797607  28.907461   1.5997502  5.2185483]
  [ 9.996442   2.3400211  9.411117  25.67427   23.679792 ]
  [15.384526   5.8529243  3.3442748 17.9591    23.031294 ]
  [11.331474  12.799731  19.072203   4.8408003  1.8772324]
  [27.099796  27.395514   3.9342809  6.0018277  3.8685591]
  [10.342284   2.8119953 16.064285  23.46217   26.61379  ]
  [19.569614  16.393673   2.1688876  4.8574104  1.6438028]
  [ 2.906278   4.118748  23.34147   15.167245  14.438804 ]
  [16.810366  19.625177   2.1813889  5.4827695  1.5361696]
  [15.728994  22.982487   3.739646   5.12022    5.7828574]
  [23.355108  21.263866   4.615658   2.916142  12.82922  ]
  [22.9414    24.69827    5.9694757  6.1599607  1.2782795]
  [ 2.2125902  1.8200252 18.677418  18.192142  17.921247 ]
  [ 5.823459   3.7628956 13.553737  22.041845  23.958305 ]]]

MCMC

In [ ]:
posterior_samples_targ_raw_all = tomtom_mcmc(seed_targ_raw_all)
posterior_samples_targ_raw_all['weights'].shape
In [ ]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_targ_raw_all['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()

Target specific ratings, Raw, No autotransitions

SVI

In [100]:
K = 2
target = 'targ' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [101]:
seed_targ_raw_noauto, map_targ_raw_noauto = tomtom_svi()
running SVI with: ttarg_raw_noauto_3d
seed = 12, initial_loss = 3515.34912109375
..............................
 final loss: -8492.7060546875

weights = [0.9429489  0.05705118]
alphas = [[[15.98154    9.046522   4.629273  19.492567 ]
  [ 4.9319973  8.631123  16.203558  17.401558 ]
  [28.178583  22.063139   9.320474  15.352234 ]
  [ 8.909565  20.589756  22.29345   19.914824 ]
  [11.99791    9.851565  10.277933  17.290995 ]
  [10.698539   8.450649  24.995567  17.354664 ]
  [17.299057   1.7120826 14.953574   2.5717728]
  [17.32804    4.839779  20.694088  18.064795 ]
  [29.84428   14.23532    4.7913165 17.676731 ]
  [19.744417   9.500362  23.964893  26.991898 ]
  [ 6.492282   8.075533  24.644491  21.448204 ]
  [ 4.4196906 15.341426  24.107742  24.706469 ]
  [ 4.3004355 18.152966  19.959278  25.655901 ]
  [20.129118   4.998923  17.925898  15.732648 ]
  [21.429047   6.7610226 10.585863   6.0406413]]

 [[ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]]]
beta = [[[ 3.0490553 18.310911  16.702576   4.459231 ]
  [15.033013  24.643373   1.5135722  4.536218 ]
  [14.270599   5.9687266 17.297276  22.394981 ]
  [20.728437   8.05991   17.962969  22.961214 ]
  [ 6.9002485 12.024734  26.613144   3.7490485]
  [21.854046  22.901506   3.8647685  2.4043286]
  [ 6.541405  14.65558   19.311052  15.591089 ]
  [31.427336  13.869628   4.695041   1.3839546]
  [ 4.9235024 26.890366  21.090528  18.757639 ]
  [13.052706  19.24958    3.1131918  6.376843 ]
  [25.752201  21.768353   6.597171   9.182824 ]
  [22.400785  27.802752   3.7548528 13.712206 ]
  [18.757593  20.109922  11.124829  13.044636 ]
  [ 3.192676  27.816683  45.091743  19.56119  ]
  [ 2.8311799 19.990696  21.64177   15.16211  ]]

 [[ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]]]

MCMC

In [ ]:
posterior_samples_targ_raw_noauto = tomtom_mcmc(seed_targ_raw_noauto)
posterior_samples_targ_raw_noauto['weights'].shape
In [ ]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_targ_raw_noauto['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()

Group level ratings

Visual Inspections

In [102]:
random.seed(200506)
# umap visualization
# rudimentary test run of umap on self transition data
fit_trans_avg_default = umap.UMAP()
u_trans_avg_default = fit_trans_avg_default.fit_transform(trans_avg)
C:\Users\zhaoz\AppData\Local\Continuum\anaconda3\lib\site-packages\numba\np\ufunc\parallel.py:355: NumbaWarning:

The TBB threading layer requires TBB version 2019.5 or later i.e., TBB_INTERFACE_VERSION >= 11005. Found TBB_INTERFACE_VERSION = 11000. The TBB threading layer is disabled.

In [103]:
plt.scatter(u_trans_avg_default[:,0],u_trans_avg_default[:,1])
Out[103]:
<matplotlib.collections.PathCollection at 0x2673c807ac8>
In [104]:
fit_trans_avg_neighbor30 = umap.UMAP(n_neighbors=30,min_dist=0.05)
u_trans_avg_neighbor30 = fit_trans_avg_neighbor30.fit_transform(trans_avg)
plt.scatter(u_trans_avg_neighbor30[:,0],u_trans_avg_neighbor30[:,1])
Out[104]:
<matplotlib.collections.PathCollection at 0x267402429e8>
In [105]:
# pca to see how many dimensions best capture
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
scaler = StandardScaler()
trans_avg_scaled = scaler.fit_transform(trans_avg)
decomp = PCA()
trans_avg_pca = decomp.fit(trans_avg_scaled)
plt.scatter(list(range(np.shape(trans_avg_pca.explained_variance_ratio_)[0])),np.array(trans_avg_pca.explained_variance_ratio_))
Out[105]:
<matplotlib.collections.PathCollection at 0x2673d1dd908>
In [106]:
# above pca result suggests 4-6 factors
# here we construct a new umap object with 3 dimensions and visulize it to see if there are obv clusters
fit_trans_avg_3d = umap.UMAP(n_neighbors=47,min_dist=0,n_components=3)
u_trans_avg_3d = fit_trans_avg_3d.fit_transform(trans_avg)

# Configure Plotly to be rendered inline in the notebook.
plotly.offline.init_notebook_mode()

trace = go.Scatter3d(
    x=u_trans_avg_3d[:,0],  # <-- Put your data instead
    y=u_trans_avg_3d[:,1],  # <-- Put your data instead
    z=u_trans_avg_3d[:,2],  # <-- Put your data instead
    mode='markers',
    marker={
        'size': 10,
        'opacity': 0.8,
        #'color': ,
    }
)
# Configure the layout.
layout = go.Layout(
    margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)
data = [trace]

plot_figure = go.Figure(data=data, layout=layout)
# Render the plot.
plotly.offline.iplot(plot_figure)

Group level ratings, normalized, with autotransitions

SVI

In [107]:
K = 2
target = 'avg' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [108]:
seed_avg_norm_all, map_avg_norm_all = tomtom_svi()
running SVI with: tavg_norm_all_3d
seed = 0, initial_loss = -5174.3681640625
..............................
 final loss: -12317.205078125

weights = [0.9429489  0.05705118]
concentration = [[[19.695879   9.246967   3.6861448 18.799387  19.683344 ]
  [19.70663    9.418472   3.3920417 21.6761    19.454601 ]
  [ 6.2971473 21.353851   6.98611   11.606868   8.104199 ]
  [ 4.980947  19.074127  23.152145   5.80566   11.648904 ]
  [25.398483  17.30595   12.280742  25.70769   29.716484 ]
  [12.474096  18.1185    23.127243  13.502753  23.634878 ]
  [18.787895  21.14244    4.616317   5.086805   7.7741814]
  [ 6.5657263  6.1605716 18.902227  18.314436  17.129623 ]
  [14.750198  23.201263   5.900644   5.3420005  4.196345 ]
  [14.510927   8.792581  20.373617  18.231302  19.399946 ]
  [ 3.9636     5.8056216 17.625397  18.115582  11.71921  ]
  [ 5.728256   4.5481043 15.997845  21.735188  18.022945 ]
  [ 5.518043   9.833829  15.098528  17.724651  20.739248 ]
  [21.598667  14.67985    5.1162505  7.8624406  7.042243 ]
  [17.688877  15.129961   5.320805   7.1907525  7.7362385]]

 [[ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]]]

MCMC

In [ ]:
posterior_samples_avg_norm_all = tomtom_mcmc(seed_avg_norm_all)
posterior_samples_avg_norm_all['weights'].shape
In [ ]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_avg_norm_all['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()

Group level ratings, normalized, No autotransitions

SVI

In [109]:
K = 2
target = 'avg' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [110]:
seed_avg_norm_noauto, map_avg_norm_noauto = tomtom_svi()
running SVI with: tavg_norm_noauto_3d
seed = 7, initial_loss = -2997.260986328125
..............................
 final loss: -8444.224609375

weights = [0.05704859 0.94295144]
concentration = [[[ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.000002   3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]]

 [[19.816658   9.899359   3.9245887 21.356356 ]
  [ 7.6499305  2.8035946 17.478914  14.341135 ]
  [12.656792  14.08597   25.46603   16.426785 ]
  [ 5.5271225 19.865831   6.4017444 12.928355 ]
  [19.287521  14.135185   9.418232  18.297806 ]
  [11.566478  16.884893  21.42566   22.167423 ]
  [17.067505   4.500498   4.9684105  7.581368 ]
  [ 7.093991   6.682546  19.838205  19.28184  ]
  [28.631342   7.006479   6.290411   4.9490647]
  [14.619195   8.8799925 20.571972  17.083801 ]
  [ 4.843748   7.201546  21.649506  14.633768 ]
  [ 6.575886   5.1879063 18.021042  20.612555 ]
  [ 7.7804823 14.044864  21.682384  24.228487 ]
  [34.188725   7.547288  11.738619  10.491128 ]
  [25.682352   8.298146  11.264816  12.213857 ]]]

MCMC

In [ ]:
posterior_samples_avg_norm_noauto = tomtom_mcmc(seed_avg_norm_noauto)
posterior_samples_avg_norm_noauto['weights'].shape
In [ ]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_avg_norm_noauto['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()

Group level ratings, Raw, with autotransitions

SVI

In [111]:
K = 2
target = 'avg' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [112]:
seed_avg_raw_all, map_avg_raw_all = tomtom_svi()
running SVI with: tavg_raw_all_3d
seed = 4, initial_loss = 8400.17578125
..............................
 final loss: -11182.1171875

weights = [0.05705118 0.9429489 ]
alphas = [[[ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.0000014  3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]]

 [[23.043142  14.469909   6.14574   16.326063  23.89208  ]
  [24.94667   19.357805   3.5449677 17.98268   22.389563 ]
  [ 7.317835  18.183903   8.2791395 18.228012  11.660458 ]
  [ 6.446609  25.23003   24.081352   5.133644  16.235857 ]
  [25.047384  28.713066  18.862745  33.519974  22.903751 ]
  [21.79212   19.835722  23.188671  24.557333  22.093401 ]
  [ 9.777547  21.326595   5.912914   4.74169    7.749933 ]
  [ 9.062979   6.4360776 16.346327  21.201803  21.273867 ]
  [16.005175  12.992807   5.000264   2.7871702  3.0842347]
  [24.180319   9.669284  24.024366  34.589684  22.743633 ]
  [ 4.5159845  6.84958   26.248726  36.578987  15.237183 ]
  [ 5.066871   5.074997  29.667871  25.641825  20.651972 ]
  [ 8.41446   17.337679  27.786894  23.061054  17.177904 ]
  [20.754953  15.57087    4.579412   8.878555   4.452351 ]
  [21.094038  21.964153   4.878831  13.655755  17.137377 ]]]
beta = [[[ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.0000007  3.0000007  3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.0000007  3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.         3.       ]]

 [[ 3.0117464 20.257603  32.19684    1.6778735  2.8123856]
  [ 2.05487   28.47037   20.7001     1.4024163  4.4176717]
  [19.55405    1.3877829 19.22253   14.744154  21.884731 ]
  [29.311914  11.218794   2.5676584 18.69561   22.637108 ]
  [ 6.7201595 21.172762  30.28498    8.27591    1.6386727]
  [25.149979   9.765474   2.45375   24.421535   2.9204118]
  [ 3.5518138  2.300869  26.636301  18.220022  17.098858 ]
  [19.266624  14.694819   1.4814081  2.5195546  1.8087504]
  [16.32575    2.5817564 19.333378  11.326118  17.596209 ]
  [13.98736   15.092448   2.8366814  8.542699   1.9013834]
  [19.79961   18.244095   2.7384431  5.9285626 12.190135 ]
  [16.371616  22.620815  15.868448   2.0842438  6.9662595]
  [30.051529  26.534163  18.102964   9.399084   1.9452   ]
  [ 4.405487  14.695965  20.317745  22.754751  12.754253 ]
  [ 2.0491724  3.4175863 12.578217  23.575914  26.194036 ]]]

MCMC

In [ ]:
posterior_samples_avg_raw_all = tomtom_mcmc(seed_avg_raw_all)
posterior_samples_avg_raw_all['weights'].shape
In [ ]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_avg_raw_all['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()

Group level ratings, Raw, No autotransitions

SVI

In [113]:
K = 2
target = 'avg' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
In [114]:
seed_avg_raw_noauto, map_avg_raw_noauto = tomtom_svi()
running SVI with: tavg_raw_noauto_3d
seed = 12, initial_loss = 4030.15234375
..............................
 final loss: -8567.099609375

weights = [0.9429489  0.05705118]
alphas = [[[15.9245825 13.461962   3.3895774 19.29943  ]
  [10.618537   4.0763054 16.357298  17.335419 ]
  [ 7.660074   7.8049903 23.99921   11.6071415]
  [ 4.6123524 20.740139   4.5680604 16.588617 ]
  [11.828557  17.597958  17.333204  17.340185 ]
  [20.180588  29.229286  24.878311  17.354664 ]
  [17.224686   3.438892   4.890964   7.227572 ]
  [14.126082   6.151453  20.5148    18.080854 ]
  [29.896412   6.3735433  4.7913165  3.0368931]
  [19.447577  12.54905   23.938145  27.038015 ]
  [ 5.713401   8.075533  24.46221   22.781866 ]
  [ 6.8497233  5.9328895 25.92089   23.823652 ]
  [ 5.462638  12.93971   20.0832    25.203949 ]
  [20.203966   6.204622  16.925175   7.6071367]
  [21.473434   7.4901147 12.656891  10.228222 ]]

 [[ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]]]
beta = [[[ 2.2078443 18.798819  16.624842   2.372875 ]
  [15.333512  24.329573   1.4441887  3.5154054]
  [20.627342  18.064466  18.652388  21.706783 ]
  [20.243286   9.324781  16.490028  22.40943  ]
  [ 3.4141796 12.5026    27.989496   4.521246 ]
  [23.257982  14.1381855  2.6157043  2.4043286]
  [ 5.9058995 14.634427  18.853771  15.855674 ]
  [30.537949  13.951901   2.4830017  1.5974001]
  [ 5.32938   25.196692  21.090528  17.235575 ]
  [11.3707075 19.769186   2.81625    6.791773 ]
  [25.670317  21.768353   4.101834  19.012066 ]
  [22.669611  26.828953  14.385132   7.965597 ]
  [18.84256   19.662842  13.197323  10.226995 ]
  [ 4.296244  28.018494  44.701458  22.653435 ]
  [ 3.380587  20.055798  21.819065  15.376851 ]]

 [[ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]
  [ 3.         3.         3.         3.       ]]]

MCMC

In [ ]:
posterior_samples_avg_raw_noauto = tomtom_mcmc(seed_avg_raw_noauto)
posterior_samples_avg_raw_noauto['weights'].shape
In [ ]:
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_avg_raw_noauto['weights'][:,0]

#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
#               colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()